Changelog
The datset contains high frequency, low frequency and raw data of 6 households in the USA. We choose House 2 for our analysis as it contains the least number of appliances. Further we choose to do the analysis of low frequency data. This house contains the following appliances/circuits:
These circuits are samppled once every 3-4 seconds. Also the house contains 2 mains which are sampled at 1 Hz.
from IPython.core.display import Image
Image(filename='nilm_space.png')
The above diagram shall remain a chief motivation in our analysis. We will try and make a case of impactful disaggregation.
In this section we setup the basic imports which shall be required for performing the analysis.
import numpy as np
import matplotlib.pyplot as plt
import math
from scipy.cluster.vq import kmeans,vq
import itertools
import matplotlib
#Setting size of figures as 20*10
plt.figsize(20,10)
#Setting font size to be 16
matplotlib.rcParams.update({'font.size': 16})
import pandas as pd
from pandas import Series,DataFrame
import json
#s = json.load( open("/home/nipun/bmh_matplotlibrc.json") )
#matplotlib.rcParams.update(s)
matplotlib.rcParams.update({'font.size': 20})
import time
!head /home/nipun/study/datasets/MIT/low_freq/house_2/channel_1.dat
!cat /home/nipun/study/datasets/MIT/low_freq/house_2/channel_2.dat|grep 1303431955
import pytz
datetime.datetime.fromtimestamp(1303082307,pytz.timezone("EST"))
datetime.datetime.fromtimestamp(1303082307)
start_time=time.time()
mains_1_data=np.loadtxt('/home/nipun/study/datasets/MIT/low_freq/house_2/channel_1.dat')
mains_2_data=np.loadtxt('/home/nipun/study/datasets/MIT/low_freq/house_2/channel_2.dat')
end_time=time.time()
print "Time taken to load Mains data= "+str(end_time-start_time)+" seconds"
We can observe that data is missing towards the end. As a part of the data cleansing process we should eliminate the last indices. Next we find the last valid index for which contiguos data is present.This corresponds to epoch timestamp of 1304282291.
#upper=np.where(mains_1_data[:,0]==1304282291.0)[0]
#lower=np.where(mains_1_data[:,0]>1303084800.0)[0][0]
mains_1_power=mains_1_data[:,1]
mains_2_power=mains_2_data[:,1]
timestamp=mains_1_data[:,0]
timestamp_mains_date=timestamp.astype('datetime64[s]')
timestamp_mains_date
timestamp_2=timestamp-9.5*60*60
timestamp_mains_date_2=timestamp_2.astype('datetime64[s]')
timestamp_mains_date_2
Overall statistics about the dataset.
df_mains=DataFrame({'Mains_1_Power':mains_1_power,'Mains_2_Power':mains_2_power},index=timestamp_mains_date)
df_mains.describe()
df_mains_2=DataFrame({'Mains_1_Power':mains_1_power,'Mains_2_Power':mains_2_power},index=timestamp_mains_date_2)
Plotting overall power consumption for the two main circuits.
start_time=time.time()
df_mains.plot(subplots=True,title='Power consumption');
end_time=time.time()
print "Time taken to plot Mains data= "+str(end_time-start_time)+" seconds"
start_time=time.time()
df_1_day=df_mains.resample('1d',how='sum');
df_1_day_energy=DataFrame(index=df_1_day.index);
df_1_day_energy['Mains_1_Energy']=Series(df_1_day.Mains_1_Power/(1000*3600),index=df_1_day.index)
df_1_day_energy['Mains_2_Energy']=Series(df_1_day.Mains_2_Power/(1000*3600),index=df_1_day.index)
end_time=time.time()
print "Time taken to resample Mains data= "+str(end_time-start_time)+" seconds"
#figsize(20,10)
df_20=df_mains[df_mains.index.day==27]
df_20_hourly=df_20.resample('1h')
print df_20_hourly.describe()
df_20_hourly.plot(kind='bar',subplots=True);
df_20_2=df_mains_2[df_mains_2.index.day==27]
df_20_hourly_2=df_20_2.resample('1h')
print df_20_hourly_2.describe()
df_20_hourly_2.plot(kind='bar',subplots=True);
Energy consumption (KWh) statistics about data.
df_1_day_energy.describe()
Plotting daily energy consumption.
def correct_labels(ax):
labels = [item.get_text() for item in ax.get_xticklabels()]
days=[label.split(" ")[0] for label in labels]
months=["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
final_labels=[]
for i in range(len(days)):
a=days[i].split("-")
final_labels.append(a[2]+"\n"+months[int(a[1])-1])
ax.set_xticklabels(final_labels)
Function to find which of the days are weekdays and their indices
def find_weekend_indices(datetime_array):
indices=[]
for i in range(len(datetime_array)):
if datetime_array[i].weekday()>=5:
indices.append(i)
return indices
def highlight_weekend(weekend_indices,ax,ymax):
i=0
while i<len(weekend_indices):
ax.fill_between([weekend_indices[i],weekend_indices[i]+2],ymax,facecolor='green',alpha=.2)
i+=2
date_range=[datetime.datetime(2011,4,18)+datetime.timedelta(days=i) for i in range(14)]
weekend_indices=find_weekend_indices(date_range)
ax=df_1_day_energy.plot(kind='bar',rot=0);
ax.set_title('Energy Consumption Per Day Home II');
ax.set_ylabel('Energy (KWh)');
correct_labels(ax);
highlight_weekend(weekend_indices,ax,6);
ax=df_1_day_energy.plot(kind='bar',stacked=True,rot=0);
ax.set_title('Energy Consumption Per Day Home II');
ax.set_ylabel('Energy (KWh)');
correct_labels(ax);
highlight_weekend(weekend_indices,ax,7);
Now we try to break down the energy consumption into 6 hours slot and see if we can see some patterns amongst the same.
df_6_hours=df_mains_2.resample('6h',how='sum');
df_6_hours_energy=DataFrame(index=df_6_hours.index);
df_6_hours_energy['Mains_1_Energy']=Series(df_6_hours.Mains_1_Power/(1000*3600),index=df_6_hours.index)
df_6_hours_energy['Mains_2_Energy']=Series(df_6_hours.Mains_2_Power/(1000*3600),index=df_6_hours.index)
Statistics about Energy data downsampled to 6 hours.
df_6_hours_energy.describe()
days_mains_1=[]
dawn_mains_1=[]
morning_mains_1=[]
dusk_mains_1=[]
night_mains_1=[]
dawn_mains_2=[]
morning_mains_2=[]
dusk_mains_2=[]
night_mains_2=[]
for i in range(len(df_6_hours_energy.Mains_1_Energy)/4):
days_mains_1.append(df_6_hours_energy.index[4*i])
dawn_mains_1.append(df_6_hours_energy.Mains_1_Energy[4*i])
morning_mains_1.append(df_6_hours_energy.Mains_1_Energy[4*i+1])
dusk_mains_1.append(df_6_hours_energy.Mains_1_Energy[4*i+2])
night_mains_1.append(df_6_hours_energy.Mains_1_Energy[4*i+3])
dawn_mains_2.append(df_6_hours_energy.Mains_2_Energy[4*i])
morning_mains_2.append(df_6_hours_energy.Mains_2_Energy[4*i+1])
dusk_mains_2.append(df_6_hours_energy.Mains_2_Energy[4*i+2])
night_mains_2.append(df_6_hours_energy.Mains_2_Energy[4*i+3])
Plotting 6 hourly breakdown of energy consumption from Mains 1
figsize(20,12)
df4=DataFrame({'1':dawn_mains_1,'2':morning_mains_1,'3':dusk_mains_1,'4':night_mains_1},index=days_mains_1)
ax=df4.plot(kind='bar',stacked=False,legend=False,rot=0);
patches, labels = ax.get_legend_handles_labels();
ax.legend(patches, labels, loc='upper right');
ax.set_title('6 hourly Breakdown of energy consumption Mains 1');
ax.set_ylabel('Energy (KWh)');
correct_labels(ax);
highlight_weekend(weekend_indices,ax,1);
#ax.fill_between(pd.date_range("2011-4-18","2011-4-24"),0,20)
df4=DataFrame({'1':dawn_mains_1,'2':morning_mains_1,'3':dusk_mains_1,'4':night_mains_1},index=days_mains_1)
ax=df4.plot(kind='bar',stacked=True,legend=False,rot=0);
#ax.fill_between(pd.date_range("2011-4-18","2011-4-24"),0,20);
patches, labels = ax.get_legend_handles_labels();
ax.legend(patches, labels, loc='upper right');
ax.set_title('6 hourly breakdown of energy consumption Mains 1');
ax.set_ylabel('Energy (KWh)');
correct_labels(ax);
highlight_weekend(weekend_indices,ax,1.6);
df5=DataFrame({'1':dawn_mains_2,'2':morning_mains_2,'3':dusk_mains_2,'4':night_mains_2},index=days_mains_1)
ax=df5.plot(kind='bar',stacked=False,legend=False,rot=0);
patches, labels = ax.get_legend_handles_labels();
ax.legend(patches, labels, loc='upper right');
ax.set_title('6 hourly Breakdown of energy consumption Mains 2');
ax.set_ylabel('Energy (KWh)');
correct_labels(ax);
highlight_weekend(weekend_indices,ax,2.5);
ax=df5.plot(kind='bar',stacked=True,legend=False,rot=0);
patches, labels = ax.get_legend_handles_labels();
ax.legend(patches, labels, loc='upper right');
ax.set_title('6 hourly Breakdown of energy consumption Mains 2');
ax.set_ylabel('Energy (KWh)');
correct_labels(ax);
highlight_weekend(weekend_indices,ax,6);
start_time=time.time()
kitchen_data=np.loadtxt('house_2/channel_3.dat')
light_data=np.loadtxt('house_2/channel_4.dat')
stove_data=np.loadtxt('house_2/channel_5.dat')
microwave_data=np.loadtxt('house_2/channel_6.dat')
washer_dry_data=np.loadtxt('house_2/channel_7.dat')
kitchen_2_data=np.loadtxt('house_2/channel_8.dat')
refrigerator_data=np.loadtxt('house_2/channel_9.dat')
dishwasher_data=np.loadtxt('house_2/channel_10.dat')
disposal_data=np.loadtxt('house_2/channel_11.dat')
upper=0
lower=len(kitchen_data)
#upper=np.where(kitchen_data[:,0]==1304282291.0)[0]
#lower=np.where(mains_1_data[:,0]>1303084800.0)[0][0]
kitchen_power=kitchen_data[:,1]
light_power=light_data[:,1]
stove_power=stove_data[:,1]
microwave_power=microwave_data[:,1]
washer_dryer_power=washer_dry_data[:,1]
kitchen_2_power=kitchen_2_data[:,1]
refrigerator_power=refrigerator_data[:,1]
dishwasher_power=dishwasher_data[:,1]
disposal_power=disposal_data[:,1]
timestamp=kitchen_data[:,0]
timestamp_appliance_date=timestamp.astype('datetime64[s]')
end_time=time.time()
print "Time taken to load appliance data= "+str(end_time-start_time)+" seconds"
df_appliances=DataFrame({'kitchen':kitchen_power,'light':light_power,'stove':stove_power,'microwave':microwave_power,\
'washer_dryer':washer_dryer_power,'kitchen_2':kitchen_2_power,'refrigerator':refrigerator_power,'dishwasher':dishwasher_power,\
'disposal':disposal_power},index=timestamp_appliance_date)
pd.set_option('display.precision', 2)
print df_appliances.describe().to_string()
df_appliances=df_appliances['04-18-2011 06:00':'05-01-2011']
df_mains=df_mains['04-18-2011 06:00':'05-01-2011']
df_mains.index
df_appliances.index
Now we plot all the channels and describe their statistics
import matplotlib.pyplot as plt
start_time=time.time()
ax=df_appliances.plot(subplots=True,title="Appliance Power consumption",fontsize=10,figsize=(20,40))
plt.tight_layout()
end_time=time.time()
print "Time taken to plot appliance data= "+str(end_time-start_time)+" seconds"
df_appliances_22=df_appliances[df_appliances.index.day==22]
df_appliances_22_hour=df_appliances_22.resample('1h')
figsize(20,40)
df_appliances_22_hour.plot(kind='bar',subplots=True);
Since there are two mains in the home we need to break down the individual appliance into different mains. Clearly,
Beyond this it is difficult to visually find the mains corresponding to different appliances. We thus decide to iteratively remove the known components. It must be noted that Mains is at 1 Hz and appliance are at lower resolution. Thus we need to align the two. This we do by aligning higher frequency mains to lower resolution appliance resolution. Thus, we downsample both the mains and all the appliances to a minute resolution, taking mean of the values contained within the minute.
df_appliances_minute=df_appliances.resample('1Min',how='mean')
pd.set_option('display.precision', 2)
print df_appliances_minute.describe().to_string()
As a sanity check, we confirm that 19400 minutes correspond to about 14 days, thus our resampling was correct. We next align mains and appliance time series.
In this section we evaluate what happens when we downsample the data.
plt.plot(df_appliances[df_appliances.index.day==21].index.to_pydatetime(),df_appliances[df_appliances.index.day==21].refrigerator.values,linewidth=4,label="Raw data");
plt.plot(df_appliances_minute[df_appliances_minute.index.day==21].index.to_pydatetime(),df_appliances_minute[df_appliances_minute.index.day==21].refrigerator.values,alpha=0.9,label="Downsampled data");
plt.legend();
Thus, we can see that by downsampling we reduce the transients which occur when the refrigerator turns on. This is required in NILM approaches. We further investigate the same by looking at a shorter time window.
plt.plot(df_appliances[df_appliances.index.day==21].between_time('10:00','12:00').index.to_pydatetime(),df_appliances[df_appliances.index.day==21].between_time('10:00','12:00').refrigerator.values,linewidth=4,label="Raw data");
plt.plot(df_appliances_minute[df_appliances_minute.index.day==21].between_time('10:00','12:00').index.to_pydatetime(),df_appliances_minute[df_appliances_minute.index.day==21].refrigerator.between_time('10:00','12:00').values,alpha=0.9,label="Downsampled data");
plt.legend();
plt.plot(df_appliances[df_appliances.index.day==21].between_time('05:00','07:00').index.to_pydatetime(),df_appliances[df_appliances.index.day==21].between_time('05:00','07:00').refrigerator.values,linewidth=4,label="Raw data");
plt.plot(df_appliances_minute[df_appliances_minute.index.day==21].between_time('05:00','07:00').index.to_pydatetime(),df_appliances_minute[df_appliances_minute.index.day==21].refrigerator.between_time('05:00','07:00').values,alpha=0.9,label="Downsampled data");
plt.legend();
From the above diagrams we can see that we don't essentially lose any important aspect by downsampling. However, we gain a lot by denoising and also reducing the amount of data which needs to be processsed.
Now we quantify the effect of downsampling by the following statistics:
plt.boxplot([df_appliances.refrigerator.values,df_appliances_minute.refrigerator.values])
pylab.xticks([1, 2], ['Raw Data', 'Downsampled Data']);
print "Standard Deviation of raw data ",df_appliances.refrigerator.std()
print "Standard Deviation of downsampled data",df_appliances_minute.refrigerator.std()
print "Number of points in raw data when power was more than 500W :",len(np.where(df_appliances.refrigerator.values>500)[0])
print "Number of points in downsampled data when power was more than 500W:",len(np.where(df_appliances_minute.refrigerator.values>500)[0])
df_mains_minute=df_mains.resample('1Min',how='mean')
df_mains_minute.describe()
#print np.where(df_mains_minute.index==df_appliances_minute.index[0])
print df_mains_minute.index
print df_mains_minute.describe()
print df_appliances_minute.index
We now plot the lower resolution mains 1 and iteratively attempt to take out appliances.
#figsize(20,15)
ax=df_mains_minute.Mains_1_Power.plot(title='1 min downsampled Power Mains 1');
ax.set_ylabel("Power (W)");
Removing kitchen_2 from Mains 1
figsize(15,15)
fig, axes = plt.subplots(nrows=4)
df_appliances_minute[df_appliances_minute.index.day==22].between_time("00:20","00:50").refrigerator.plot(ax=axes[0])
df_mains_minute[df_mains_minute.index.day==22].between_time("00:20","00:50").plot(ax=axes[1]);
df_mains[df_mains.index.day==22].between_time("00:20","00:50").plot(ax=axes[2]);
df_appliances[df_appliances.index.day==22].between_time("00:20","00:50").refrigerator.plot(ax=axes[3]);
#df_mains_minute.at_time("00:26")
#df_mains.at_time("00:26")
temp_1=df_mains_minute.Mains_1_Power-df_appliances_minute.kitchen_2
temp_1[temp_1<0.0]=0.0
df_mains_minute_minus_kitchen_2=df_mains_minute.copy()
df_mains_minute_minus_kitchen_2.Mains_1_Power=temp_1
print "Before\n\n",df_mains_minute.describe()
print "\nAfter\n\n",df_mains_minute_minus_kitchen_2.describe()
ax=df_mains_minute_minus_kitchen_2.Mains_1_Power.plot(title='Mains 1 Power Minus Kitchen 2')
ax.set_ylabel('Power');
Now removing Kitchen 1 from mains 1
temp_2=df_mains_minute_minus_kitchen_2.Mains_1_Power-df_appliances_minute.kitchen
temp_2[temp_2<0.0]=0.0
df_mains_minute_minus_kitchen_2_1=df_mains_minute_minus_kitchen_2.copy()
df_mains_minute_minus_kitchen_2_1.Mains_1_Power=temp_2
print "Before\n\n",df_mains_minute_minus_kitchen_2.describe()
print "\nAfter\n\n",df_mains_minute_minus_kitchen_2_1.describe()
ax=df_mains_minute_minus_kitchen_2_1.Mains_1_Power.plot(title='Mains 1 Power Minus Kitchen 2 and Kitchen 1')
ax.set_ylabel('Power');
Now we observe that Dishwasher was used every 3rd day starting from 19th and power consumption was about 1200+ W. Thus, we next remove the dishwasher component from mains 1
temp_3=df_mains_minute_minus_kitchen_2_1.Mains_1_Power-df_appliances_minute.dishwasher
temp_3[temp_3<0.0]=0.0
df_mains_minute_minus_kitchen_dishwasher=df_mains_minute_minus_kitchen_2_1.copy()
df_mains_minute_minus_kitchen_dishwasher.Mains_1_Power=temp_3
print "Before\n\n",df_mains_minute_minus_kitchen_2_1.describe()
print "\nAfter\n\n",df_mains_minute_minus_kitchen_dishwasher.describe()
ax=df_mains_minute_minus_kitchen_dishwasher.Mains_1_Power.plot(title='Mains 1 Power Minus Kitchen and Dishwasher')
ax.set_ylabel('Power');
Removing stove from Mains 1
temp_4=df_mains_minute_minus_kitchen_dishwasher.Mains_1_Power-df_appliances_minute.stove
temp_4[temp_4<0.0]=0.0
df_mains_minute_minus_kitchen_dishwasher_stove=df_mains_minute_minus_kitchen_dishwasher.copy()
df_mains_minute_minus_kitchen_dishwasher_stove.Mains_1_Power=temp_4
print "Before\n\n",df_mains_minute_minus_kitchen_dishwasher.describe()
print "\nAfter\n\n",df_mains_minute_minus_kitchen_dishwasher_stove.describe()
ax=df_mains_minute_minus_kitchen_dishwasher_stove.Mains_1_Power.plot(title='Mains 1 Power Minus Kitchen, Dishwasher and Stove')
ax.set_ylabel('Power');
We next observe that none of the other appliance can be extracted visually from Mains 1. So we start removing appliances iteratively from Mains 2. From the next plot we can see that there is a slight difference in power seen by the mains and the appliance level monitor, hence there seems to be a need to do this calibration to ensure that we have better results. Moreover, this is an aspect i think no one has yet highlighted in their work.
temp_5=df_mains_minute_minus_kitchen_dishwasher_stove.Mains_2_Power-df_appliances_minute.refrigerator
temp_5[temp_5<0.0]=0.0
plt.subplot(3,1,1)
df_appliances_minute.refrigerator.plot(title='Refrigerator')
plt.subplot(3,1,2)
df_mains_minute.Mains_2_Power.plot(title='Mains 2')
plt.subplot(3,1,3)
temp_5.plot(title='Mains 2 after removing refrigerator');
plt.tight_layout()
df_mains_minute_minus_kitchen_dishwasher_stove_ref=df_mains_minute_minus_kitchen_dishwasher_stove.copy()
df_mains_minute_minus_kitchen_dishwasher_stove_ref.Mains_2_Power=temp_5
print "Before\n\n",df_mains_minute_minus_kitchen_dishwasher_stove.describe()
print "\nAfter\n\n",df_mains_minute_minus_kitchen_dishwasher_stove_ref.describe()
ax=df_mains_minute_minus_kitchen_dishwasher_stove_ref.Mains_2_Power.plot(title='Mains 2 Power Minus Refrigerator')
ax.set_ylabel('Power');
Since microwave is taking more power than residual power in Mains 1, it has to belong to Mains 2. Next, removing Microwave from Mains 2.
temp_6=df_mains_minute_minus_kitchen_dishwasher_stove_ref.Mains_2_Power-df_appliances_minute.microwave
temp_6[temp_6<0.0]=0.0
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro=df_mains_minute_minus_kitchen_dishwasher_stove_ref.copy()
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro.Mains_2_Power=temp_6
print "Before\n\n",df_mains_minute_minus_kitchen_dishwasher_stove_ref.describe()
print "\nAfter\n\n",df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro.describe()
ax=df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro.Mains_2_Power.plot(title='Mains 2 Power Minus Refrigerator and Micro')
ax.set_ylabel('Power');
An interesting thing to note is the correlation between Disposal and Dishwasher both of which occur on the same days. Next, we iteratively start extracting out appliances from Mains 2.
Removing lighting from Mains 2.
temp_7=df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro.Mains_2_Power-df_appliances_minute.light
temp_7[temp_7<0.0]=0.0
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light=df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro.copy()
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.Mains_2_Power=temp_7
print "Before\n\n",df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro.describe()
print "\nAfter\n\n",df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.describe()
fig, axes = plt.subplots(nrows=2)
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.Mains_2_Power.plot(title='Mains 2 Residual Power',ax=axes[0])
axes[0].set_ylabel('Power');
df_mains_minute.Mains_2_Power.plot(title='Mains 2 Power',ax=axes[1])
axes[1].set_ylabel('Power');
Thus, we can see that both for Mains 1 and Mains 2 there is still a lot of unaccounted power. This is due to mis calibration between the appliance level loads and also due to absence of complete information. This is an important aspect to address. Next, we draw the boxplot showing unaccounted power.
fig=plt.figure()
axes=plt.gca()
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light[df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.index.day==18].Mains_2_Power.plot(title='Mains 2 Residual Power',ax=axes,label='Residual Power')
axes.set_ylabel('Power');
df_mains_minute[df_mains_minute.index.day==18].Mains_2_Power.plot(title='Mains 2 Power',ax=axes)
axes.set_ylabel('Power');
df_appliances_minute[df_appliances_minute.index.day==18].refrigerator.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==18].microwave.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==18].light.plot(ax=axes,linewidth=4)
plt.title('The Case of Metering Equipment Miscalibration');
plt.legend();
fig=plt.figure()
axes=plt.gca()
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light[df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.index.day==19].between_time('03:00','04:00').Mains_2_Power.plot(title='Mains 2 Residual Power',ax=axes,label='Residual Power')
axes.set_ylabel('Power');
df_mains_minute[df_mains_minute.index.day==19].between_time('03:00','04:00').Mains_2_Power.plot(title='Mains 2 Power',ax=axes)
axes.set_ylabel('Power');
df_appliances_minute[df_appliances_minute.index.day==19].between_time('03:00','04:00').refrigerator.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==19].between_time('03:00','04:00').microwave.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==19].between_time('03:00','04:00').light.plot(ax=axes,linewidth=4)
fac=df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light[df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.index.day==19].between_time('03:00','04:00').Mains_2_Power.plot(title='Mains 2 Residual Power',ax=axes,label='Residual Power')
plt.title('The Case of Metering Equipment Miscalibration II');
plt.legend();
Thus, we can account for this mis calibration and this is bound to produce more accurate results, since the difference is very significant. Also there is a possibility of some major load not being monitored, since the load around 6 PM cannot be attributed to any given appliance. It could also be a case that at meter level we are recording Apparent Power whereas at appliance level, Real power is getting recorded. We further confirm this by investigating further when some other appliances were on.
fig=plt.figure()
axes=plt.gca()
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light[df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.index.day==19].between_time('16:55','17:05').Mains_2_Power.plot(title='Mains 2 Residual Power',ax=axes,label='Residual Power')
axes.set_ylabel('Power');
df_mains_minute[df_mains_minute.index.day==19].between_time('16:55','17:05').Mains_2_Power.plot(title='Mains 2 Power',ax=axes)
axes.set_ylabel('Power');
df_appliances_minute[df_appliances_minute.index.day==19].between_time('16:55','17:05').refrigerator.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==19].between_time('16:55','17:05').microwave.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==19].between_time('16:55','17:05').light.plot(ax=axes,linewidth=4)
plt.title('The Case of Metering Equipment Miscalibration II');
plt.legend();
plt.figure()
plt.title("Unaccounted Power")
plt.ylabel("Power (W)")
df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.boxplot();
Mains 1
labels = 'Kitchen 1', 'Kitchen 2', 'Dishwasher', 'Stove','Unaccounted'
kitchen_mean,kitchen_2_mean,dishwasher_mean,stove_mean=df_appliances_minute.kitchen.mean(),\
df_appliances_minute.kitchen_2.mean(),df_appliances_minute.dishwasher.mean(),df_appliances_minute.stove.mean()
unaccounted_mean=df_mains_minute.Mains_1_Power.mean()-(kitchen_mean+kitchen_2_mean+dishwasher_mean+stove_mean)
fracs = [kitchen_mean,kitchen_2_mean,dishwasher_mean,stove_mean,unaccounted_mean]
explode=(0, 0, 0, 0,0)
plt.figsize(7,7)
plt.title('Power Breakdown by appliance Mains 1');
plt.pie(fracs, explode=explode, labels=labels,autopct='%1.1f%%', shadow=True);
labels = 'Refrigerator', 'Microwave', 'Lighting','Unaccounted Power'
refrigerator_mean,microwave_mean,lighting_mean=df_appliances_minute.refrigerator.mean(),\
df_appliances_minute.microwave.mean(),df_appliances_minute.light.mean()
unaccounted_mean=df_mains_minute.Mains_2_Power.mean()-(refrigerator_mean+microwave_mean+lighting_mean)
fracs = [refrigerator_mean,microwave_mean,lighting_mean,unaccounted_mean]
explode=(0, 0, 0,0)
plt.figsize(7,7)
plt.title('Power Breakdown by appliance Mains 2');
plt.pie(fracs, explode=explode, labels=labels,autopct='%1.1f%%', shadow=True);
Thus, we can see that in both the mains circuits about 1/3 of total power cannot be attributed to any appliance.
labels = 'Mains 1','Mains 2'
fracs = [df_mains_minute.Mains_1_Power.mean(),df_mains_minute.Mains_2_Power.mean()]
explode=(0, 0)
plt.figsize(7,7)
plt.title('Power Breakdown by Mains');
plt.pie(fracs, explode=explode, labels=labels,autopct='%1.1f%%', shadow=True);
Remaining load is unaccounted for in the analysis. We now have 2 options:
Option 1 is more realistic and Option 2 is more ideal. We shall be considering the ideal case through the remaining analysis. Thus, we need to filter out the remaining data.
filtered_mains_1_power=df_appliances_minute.kitchen+df_appliances_minute.kitchen_2+df_appliances_minute.stove+\
df_appliances_minute.dishwasher
filtered_mains_2_power=df_appliances_minute.refrigerator+df_appliances_minute.light+df_appliances_minute.microwave
df_filtered_mains=pd.DataFrame({'Mains_1_Power':filtered_mains_1_power,'Mains_2_Power':filtered_mains_2_power},\
index=df_mains_minute.index)
df_filtered_mains.describe()
Plotting Disaggregated consumption Mains 1
fig=plt.figure()
axes=plt.gca()
df_filtered_mains[df_filtered_mains.index.day==19].between_time('03:00','04:00').Mains_2_Power.plot(title='Mains 2 Residual Power',ax=axes,label='Mains 2')
axes.set_ylabel('Power');
df_appliances_minute[df_appliances_minute.index.day==19].between_time('03:00','04:00').refrigerator.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==19].between_time('03:00','04:00').microwave.plot(ax=axes)
df_appliances_minute[df_appliances_minute.index.day==19].between_time('03:00','04:00').light.plot(ax=axes,linewidth=4)
#fac=df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light[df_mains_minute_minus_kitchen_dishwasher_stove_ref_micro_light.index.day==19].between_time('03:00','04:00').Mains_2_Power.plot(title='Mains 2 Residual Power',ax=axes,label='Residual Power')
plt.title('The Case of Metering Equipment Miscalibration II');
plt.legend();
python_datetime=df_filtered_mains.index.to_pydatetime()
plt.figsize(20,15)
plt.title('Actual Disaggregated Breakdown Mains 1');
plt.xlabel('Time');
plt.ylabel('Power (W)');
y_1=df_appliances_minute.kitchen+df_appliances_minute.stove
y_2=y_1+df_appliances_minute.dishwasher
plt.fill_between(python_datetime,df_appliances_minute.kitchen,np.zeros(len(df_appliances_minute.kitchen)),color="yellow")
plt.fill_between(python_datetime,y_1,df_appliances_minute.kitchen,color='blue',label='Test')
plt.fill_between(python_datetime,y_2,y_1,color='red',alpha=.6)
plt.fill_between(python_datetime,y_2,df_filtered_mains.Mains_1_Power,color='green',alpha=0.4)
p = Rectangle((0, 0), 1, 1, fc="y")
p1=Rectangle((0, 0), 1, 1, fc="b")
p2=Rectangle((0, 0), 1, 1, fc="r")
p3=Rectangle((0, 0), 1, 1, fc="g")
legend([p,p1,p2,p3], ["Kitchen","Stove","Dishwasher","Kitchen 2"]);
In this section we describe how we divide the data (both the mains data and the appliance level) into training and test dataset. Since we have data worth 2 weeks, we decide to use the first week of data for training and the second week of data for testing. This shall ensure that factors such as weekday/ weekend don't spoil the analysis.
df_train=df_mains_minute[:'2011-4-24']
df_test=df_mains_minute['2011-4-25':]
df_appliances_minute_train=df_appliances_minute[:'2011-4-24']
df_appliances_minute_test=df_appliances_minute['2011-4-25':]
df_filtered_mains_train=df_filtered_mains[:'2011-4-24']
df_filtered_mains_test=df_filtered_mains['2011-4-25':]
df_appliances_minute_train=df_appliances_minute[:'2011-4-24']
df_appliances_minute_test=df_appliances_minute['2011-4-25':]
We also do forward filling ensuring that we don't have NA in the dataset. This is required as the clustering algorithms cannot handle missing data. CONFIRM: If this is the right thing to do.
df_appliances_minute_train.fillna(method='pad',inplace=True)
df_appliances_minute_test.fillna(method='pad',inplace=True)
df_filtered_mains_train.fillna(method='pad',inplace=True)
df_filtered_mains_test.fillna(method='pad',inplace=True)
Plotting the mains test and train data
fig,axes=fig, axes = plt.subplots(nrows=2, ncols=2)
df_filtered_mains_train.Mains_1_Power.plot(ax=axes[0,0]);
axes[0,0].set_title('Mains 1 Train Data');
df_filtered_mains_test.Mains_1_Power.plot(ax=axes[1,0]);
axes[1,0].set_title('Mains 1 Test Data');
df_filtered_mains_train.Mains_2_Power.plot(ax=axes[0,1]);
axes[0,1].set_title('Mains 2 Train Data');
df_filtered_mains_test.Mains_2_Power.plot(ax=axes[1,1]);
axes[1,1].set_title('Mains 2 Test Data');
We can see that the training set is a good representative of the test dataset.
Finding different states for each appliance and the corresponding power consumption using various clustering techniques. Firstly, we start with stove. There are several reasons behind choosing a clustering algorithm, some of them are mentioned at http://scikit-learn.org/stable/modules/clustering.html
from sklearn.cluster import MiniBatchKMeans, KMeans
import time
plt.figsize(15,8)
times=df_appliances_minute_train.index.to_pydatetime()
raw_data={}
for key in df_appliances_minute_train:
raw_data[key]=df_appliances_minute_train[key].values
length=len(raw_data[key])
raw_data[key]=raw_data[key].reshape(length,1)
Also saving the data into .arff files so that we can also cross verify from Weka.
for key in df_appliances_minute_train:
df_appliances_minute_train[key].to_csv(key+".arff",index_label=False,index=False)
Question: How to choose the batch size?
batch_size=1000
def apply_kmeans(n_clusters, n_init,X,init=None):
if init is None:
k_means = KMeans(n_clusters=n_clusters, n_init=n_init)
else:
k_means=KMeans(init='k-means++',n_clusters=n_clusters, n_init=n_init)
t0 = time.time()
k_means.fit(X)
t_batch = time.time() - t0
k_means_labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
k_means_labels_unique = np.unique(k_means_labels)
k_means_inertia=k_means.inertia_
mbk = MiniBatchKMeans(init='k-means++', n_clusters=n_clusters, batch_size=batch_size,
n_init=n_init, max_no_improvement=10, verbose=0)
t0 = time.time()
mbk.fit(X)
t_mini_batch = time.time() - t0
mbk_means_labels = mbk.labels_
mbk_means_cluster_centers = mbk.cluster_centers_
mbk_means_labels_unique = np.unique(mbk_means_labels)
mbk_inertia=mbk.inertia_
return [t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,\
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]
def plot_cluster_assignments(X,k_means_labels, mbk_means_labels,k_means_cluster_centers,mbk_means_cluster_centers,n_clusters,appliance_name):
colors = ['#4EACC5', '#FF9C34', '#4E9A06']
markers=['o','*','.']
x_temp=np.arange(len(X))
plt.subplot(2,2,1)
plt.rcParams["font.size"]=10
print "\nKMeans Analysis\n"
print "-"*80
for k, col in zip(range(n_clusters), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
plt.ylabel('Power (W)');
plt.plot(x_temp[my_members],X[my_members, 0],markers[k],markersize=10,markerfacecolor=col)
plt.axhline(k_means_cluster_centers[k],linewidth=3,color=col)
print "State %d Centroid= %0.4f, Fraction of datapoints= %0.4f" %(k,cluster_center,sum(my_members)*1.0/np.size(X))
plt.title('KMeans Cluster Assignment for '+appliance_name+' for K='+str(n_clusters))
plt.subplot(2,2,2)
print "\nMini Batch KMeans Analysis\n"
print "-"*80
for k, col in zip(range(n_clusters), colors):
my_members = mbk_means_labels == k
cluster_center = mbk_means_cluster_centers[k]
plt.ylabel('Power (W)');
plt.plot(x_temp[my_members],X[my_members, 0],markers[k],markersize=10,markerfacecolor=col)
plt.axhline(mbk_means_cluster_centers[k],linewidth=3,color=col)
print "State %d Centroid= %0.4f, Fraction of datapoints= %0.4f" %(k,cluster_center,sum(my_members)*1.0/np.size(X))
plt.title('Mini Batch KMeans Cluster Assignment for '+appliance_name+' for K='+str(n_clusters))
print "-"*80
[t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]=apply_kmeans(2,10,raw_data["stove"],"kmeans++")
plot_cluster_assignments(raw_data["stove"],k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers\
,2,"Stove")
print "Time taken for Kmeans clustering : \n",t_batch
print "Inertia of KMeans cluster assignment: \n",k_means_inertia
print "Time taken for Mini Batch Kmeans clustering : \n",t_mini_batch
print "Inertia of Mini Batch KMeans cluster assignment: \n",mbk_inertia
flattened=k_means_cluster_centers.flatten()
sorted_list=sort(flattened)
labels_stove=[]
for label in k_means_labels:
labels_stove.append(sorted_list.tolist().index(flattened[label]))
labels_stove=np.array(labels_stove)
[t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]=apply_kmeans(2,10,raw_data["kitchen"],"kmeans++")
plot_cluster_assignments(raw_data["kitchen"],k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers\
,2,"Kitchen")
print "Time taken for Kmeans clustering : \n",t_batch
print "Inertia of KMeans cluster assignment: \n",k_means_inertia
print "Time taken for Mini Batch Kmeans clustering : \n",t_mini_batch
print "Inertia of Mini Batch KMeans cluster assignment: \n",mbk_inertia
flattened=k_means_cluster_centers.flatten()
sorted_list=sort(flattened)
labels_kitchen=[]
for label in k_means_labels:
labels_kitchen.append(sorted_list.tolist().index(flattened[label]))
labels_kitchen=np.array(labels_kitchen)
[t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]=apply_kmeans(3,10,raw_data["kitchen_2"],"kmeans++")
plot_cluster_assignments(raw_data["kitchen_2"],k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers\
,3,"Kitchen 2")
print "Time taken for Kmeans clustering : \n",t_batch
print "Inertia of KMeans cluster assignment: \n",k_means_inertia
print "Time taken for Mini Batch Kmeans clustering : \n",t_mini_batch
print "Inertia of Mini Batch KMeans cluster assignment: \n",mbk_inertia
flattened=k_means_cluster_centers.flatten()
sorted_list=sort(flattened)
labels_kitchen_2=[]
for label in k_means_labels:
labels_kitchen_2.append(sorted_list.tolist().index(flattened[label]))
labels_kitchen_2=np.array(labels_kitchen_2)
[t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]=apply_kmeans(3,10,raw_data["dishwasher"],"kmeans++")
plot_cluster_assignments(raw_data["dishwasher"],k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers\
,3,"Dishwasher")
print "Time taken for Kmeans clustering : \n",t_batch
print "Inertia of KMeans cluster assignment: \n",k_means_inertia
print "Time taken for Mini Batch Kmeans clustering : \n",t_mini_batch
print "Inertia of Mini Batch KMeans cluster assignment: \n",mbk_inertia
flattened=k_means_cluster_centers.flatten()
sorted_list=sort(flattened)
labels_dishwasher=[]
for label in k_means_labels:
labels_dishwasher.append(sorted_list.tolist().index(flattened[label]))
labels_dishwasher=np.array(labels_dishwasher)
[t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]=apply_kmeans(3,10,raw_data["refrigerator"],"kmeans++")
plot_cluster_assignments(raw_data["refrigerator"],k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers\
,3,"Refrigerator")
print "Time taken for Kmeans clustering : \n",t_batch
print "Inertia of KMeans cluster assignment: \n",k_means_inertia
print "Time taken for Mini Batch Kmeans clustering : \n",t_mini_batch
print "Inertia of Mini Batch KMeans cluster assignment: \n",mbk_inertia
flattened=k_means_cluster_centers.flatten()
sorted_list=sort(flattened)
labels_refrigerator=[]
for label in k_means_labels:
labels_refrigerator.append(sorted_list.tolist().index(flattened[label]))
labels_refrigerator=np.array(labels_refrigerator)
[t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]=apply_kmeans(3,10,raw_data["microwave"],"kmeans++")
plot_cluster_assignments(raw_data["microwave"],k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers\
,3,"Microwave")
print "Time taken for Kmeans clustering : \n",t_batch
print "Inertia of KMeans cluster assignment: \n",k_means_inertia
print "Time taken for Mini Batch Kmeans clustering : \n",t_mini_batch
print "Inertia of Mini Batch KMeans cluster assignment: \n",mbk_inertia
flattened=k_means_cluster_centers.flatten()
sorted_list=sort(flattened)
labels_microwave=[]
for label in k_means_labels:
labels_microwave.append(sorted_list.tolist().index(flattened[label]))
labels_microwave=np.array(labels_microwave)
[t_batch, t_mini_batch, k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers,
k_means_labels_unique,mbk_means_labels_unique, k_means_inertia, mbk_inertia]=apply_kmeans(3,10,raw_data["light"],"kmeans++")
plot_cluster_assignments(raw_data["light"],k_means_labels, mbk_means_labels, k_means_cluster_centers, mbk_means_cluster_centers\
,3,"Light")
print "Time taken for Kmeans clustering : \n",t_batch
print "Inertia of KMeans cluster assignment: \n",k_means_inertia
print "Time taken for Mini Batch Kmeans clustering : \n",t_mini_batch
print "Inertia of Mini Batch KMeans cluster assignment: \n",mbk_inertia
flattened=k_means_cluster_centers.flatten()
sorted_list=sort(flattened)
labels_light=[]
for label in k_means_labels:
labels_light.append(sorted_list.tolist().index(flattened[label]))
labels_light=np.array(labels_light)
from scipy.spatial import distance
from sklearn.cluster import DBSCAN
We also tried different a different distance metric like Manhattan distance in Weka, which led to absurd results and mostly single class being predicted.
Tried to do DBScan in scikit learn, but it took too much memory and time and hung the system
DBSCan in Weka took about 30s and resulted in all attributed being classified in one cluster. This may be due to the nature of the data, which is 1 class majority. This again may be something to look into. While dealing with such data we are mostly going to encounter datasets which are heavy in a single class. This is again something, which i don't think has been talked in NILM literature before.
SOM based clustering took about 30s per appliance, but yielded much better results, which are shown below. EM took about 42 seconds and the results are not too impressive.
from IPython.core.display import Image
Image(filename='som_ref.png')
Image(filename='som_stove.png')
Thus, we obtain the following states from cluster analysis (results are from KMeans). In the above results we also saw that most of the appliances are mostly in their off states most of the time. This is an important aspect and must be exploited.
kitchen=[5,727]
lighting=[9,96,155]
stove=[0,374]
micro=[8,852,1677]
kitchen_2=[1,206,1035]
ref=[7,162,423]
dish=[0,250,1186]
Using these cluster assignment we label the state assignment for the test data. We do this by assigning the state which is closest in power to observed power.
def find_nearest_index(array,value):
idx = (np.abs(array-value)).argmin()
return idx
def find_labels(appliance_power_consumption_list, observed_power):
labels=np.zeros(len(observed_power))
appliance_power_consumption_array=np.array(appliance_power_consumption_list)
for i in range(len(observed_power)):
labels[i]=find_nearest_index(appliance_power_consumption_array,observed_power[i])
return labels
labels_kitchen=find_labels(kitchen, df_appliances_minute_test.kitchen)
labels_kitchen_2=find_labels(kitchen_2, df_appliances_minute_test.kitchen_2)
labels_refrigerator=find_labels(ref, df_appliances_minute_test.refrigerator)
labels_microwave=find_labels(micro, df_appliances_minute_test.microwave)
labels_stove=find_labels(stove, df_appliances_minute_test.stove)
labels_light=find_labels(lighting, df_appliances_minute_test.light)
labels_dishwasher=find_labels(dish, df_appliances_minute_test.dishwasher)
Performing a sanity check that label assignment was OK.
plt.subplot(2,1,1)
plt.plot(labels_refrigerator)
plt.subplot(2,1,2)
plt.plot(df_appliances_minute_test.refrigerator.values);
What if the labeled appliance wise data was not given to us. In that case we would have resorted to applying step change on mains and trying to find out what appliance each step change conforms to. Firstly we find the change in power consumption from previous minute.
diff_mains_1_minute=np.diff(df_mains_minute.Mains_1_Power.values)
plt.subplot(3,1,1)
plt.plot(df_mains_minute.Mains_1_Power.values,label='Power')
plt.legend();
plt.subplot(3,1,2)
plt.plot(diff_mains_1_minute,color='g',label='Delta Power')
plt.legend();
plt.subplot(3,1,3)
plt.plot(np.abs(diff_mains_1_minute),color='r',label='|Delta Power|');
plt.legend();
Finding some statistics about the step changes.
df_mains_1_delta=pd.DataFrame(diff_mains_1_minute,index=df_filtered_mains[1:].index)
df_mains_1_delta.describe()
Thus, we can see that majority of the records (upto 75 percentile) report a delta power of less than 0.1 W, which is an indication that the data is not very noisy. To have a better idea of what to set threshold for actual power change occuring due to appliance swithing, we decide to draw the boxplot of this series.
df_mains_1_delta.boxplot();
df_mains_1_delta[np.abs(df_mains_1_delta.values)>15].boxplot();
df_mains_1_delta[np.abs(df_mains_1_delta.values)>15].describe()
plt.figsize(10,7)
plt.subplot(6,1,1)
df_mains_1_delta_sig=df_mains_1_delta[np.abs(df_mains_1_delta)>15]
x=df_mains_1_delta_sig[df_mains_1_delta_sig.index.day==18].index
y=np.abs(df_mains_1_delta_sig[df_mains_1_delta_sig.index.day==18].values)
plt.plot(x,y,'go')
df_18=df_filtered_mains[df_filtered_mains.index.day==18]
plt.plot(df_18.index,df_18.Mains_1_Power.values)
times_step_changes=df_mains_1_delta_sig.index
plt.subplot(6,1,2)
plt.plot(df_filtered_mains[df_filtered_mains.index.day==18].index,df_filtered_mains[df_filtered_mains.index.day==18].values)
plt.subplot(6,1,3)
plt.plot(df_appliances_minute[df_appliances_minute.index.day==18].kitchen.index,df_appliances_minute[df_appliances_minute.index.day==18].kitchen.values)
X=df_mains_1_delta[np.abs(df_mains_1_delta.values)>800].values
D = distance.squareform(distance.pdist(X))
S = 1 - (D / np.max(D))
db = DBSCAN(eps=0.05, min_samples=2).fit(S)
core_samples = db.core_sample_indices_
labels = db.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print n_clusters_
print core_samples,len(core_samples)
from itertools import cycle
colors = cycle('bgrcmybgrcmybgrcmybgrcmy')
for k, col in zip(set(labels), colors):
if k == -1:
# Black used for noise.
col = 'k'
markersize = 6
class_members = [index[0] for index in np.argwhere(labels == k)]
cluster_core_samples = [index for index in core_samples
if labels[index] == k]
for index in class_members:
x = X[index]
if index in core_samples and k != -1:
markersize = 14
else:
markersize = 6
plt.plot(x[0], 'o',
markeredgecolor='k', markersize=markersize)
Thus, we can see that we can reduce the whole dataset worth 14 days of data at a minute resolution to 1300 switch events where power change >=15w
We now try to cluster the delta changes and see if we can infer more information from the same, we shall also later see how we can use this analysis in step change algorithm.
For mains 1, we now draw the state space, which consists of all possible combinations of appliances in their different states. We also show the correpsonding histogram which tells how close the different states are. The closer the states, the more difficult the disaggregation becomes.
states_combination=list(itertools.product(kitchen,stove,kitchen_2,dish))
print "The possible different state combinations are\n Kitchen, Stove, Kitchen 2, Dishwasher\n",states_combination
sum_combination=np.array(np.zeros(len(states_combination)))
for i in range(0,len(states_combination)):
sum_combination[i]=sum(states_combination[i])
from copy import deepcopy
b=deepcopy(sum_combination)
b.sort()
grid(True)
title('Sorted possible sums of all appliances');
plt.xlabel('Combinations');
plt.ylabel('Power (W)');
plt.plot(b,'go-',markersize=8,linewidth=3);
plt.title('Histogram showing relative frequencies of different state combinations');
plt.hist(b,20);
Now we basically need to iterate over all our sample and see which of the possible state assignment matches closest to the overall power consumption. We thus create a function to do the same.
def find_nearest(array,value):
idx = (np.abs(array-value)).argmin()
diff=array[idx]-value
return [idx,-diff]
We thus apply this technique to find the residual power left from the closest state assignment.
residual_power_mains_1=np.zeros(len(df_filtered_mains_test.Mains_1_Power.values))
states_idx=np.zeros(len(df_filtered_mains_test.Mains_1_Power.values))
for i in range(len(df_filtered_mains_test.Mains_1_Power.values)):
[states_idx[i],residual_power_mains_1[i]]=find_nearest(sum_combination,df_filtered_mains_test.Mains_1_Power.values[i])
residual_power_mains_1_actual=np.zeros(len(df_test.Mains_1_Power.values))
states_idx_actual=np.zeros(len(df_test.Mains_1_Power.values))
for i in range(len(df_test.Mains_1_Power.values)):
[states_idx_actual[i],residual_power_mains_1_actual[i]]=find_nearest(sum_combination,df_test.Mains_1_Power.values[i])
title('Residual Power Mains 1');
xlabel('Time');
ylabel('Power (W)');
plot(residual_power_mains_1);
However we can also impose another conditon that the sum of powers of all the appliances is strictly less than the aggregate power observed. Thus, we can create a function for that as follows.
def find_nearest_positive(array,value):
idx_temp = np.where(array-value<=0.0)[0]
temp_arr=array[idx_temp]
try:
idx_in_new=np.abs(temp_arr-value).argmin()
idx=np.where(array==temp_arr[idx_in_new])[0][0]
diff=array[idx]-value
except:
idx=0
diff=0
return [idx,-diff]
residual_power_mains_1_positive=np.zeros(len(df_filtered_mains_test.Mains_1_Power.values))
states_idx_positive=np.zeros(len(df_filtered_mains_test.Mains_1_Power.values))
residual_power_mains_2=np.zeros(len(df_filtered_mains_test.Mains_2_Power.values))
states_idx_2=np.zeros(len(df_filtered_mains_test.Mains_2_Power.values))
t0=time.time()
for i in range(len(df_filtered_mains_test.Mains_1_Power.values)):
[states_idx_positive[i],residual_power_mains_1_positive[i]]=find_nearest_positive(sum_combination,df_filtered_mains_test.Mains_1_Power.values[i])
t1=time.time()
print "Time taken for CO: ",t1-t0
title('Residual Power Mains 1 considering sum of appliance powers < Aggregate Power');
xlabel('Time');
ylabel('Power (W)');
grid(True);
plot(residual_power_mains_1_positive);
After having applied Total Load Model, we need to assign different states to different appliances.
length_sequence=len(df_filtered_mains_test.Mains_1_Power.values)
co_kitchen_states=np.zeros(length_sequence,dtype=np.int)
co_kitchen_power=np.zeros(length_sequence)
co_stove_states=np.zeros(length_sequence,dtype=np.int)
co_stove_power=np.zeros(length_sequence)
co_kitchen_2_states=np.zeros(length_sequence,dtype=np.int)
co_kitchen_2_power=np.zeros(length_sequence)
co_dish_states=np.zeros(length_sequence,dtype=np.int)
co_dish_power=np.zeros(length_sequence)
length_sequence=len(df_test.Mains_1_Power.values)
co_kitchen_states_actual=np.zeros(length_sequence,dtype=np.int)
co_kitchen_power_actual=np.zeros(length_sequence)
co_stove_states_actual=np.zeros(length_sequence,dtype=np.int)
co_stove_power_actual=np.zeros(length_sequence)
co_kitchen_2_states_actual=np.zeros(length_sequence,dtype=np.int)
co_kitchen_2_power_actual=np.zeros(length_sequence)
co_dish_states_actual=np.zeros(length_sequence,dtype=np.int)
co_dish_power_actual=np.zeros(length_sequence)
for i in range(length_sequence):
if int(states_idx[i])/18==0:
co_kitchen_states[i]=0
else:
co_kitchen_states[i]=1
co_kitchen_power[i]=kitchen[co_kitchen_states[i]]
temp=int(states_idx[i])/9
if temp%2==0:
co_stove_states[i]=0
else:
co_stove_states[i]=1
co_stove_power[i]=stove[co_stove_states[i]]
temp=int(states_idx[i])/3
if temp%3==0:
co_kitchen_2_states[i]=0
elif temp%3==1:
co_kitchen_2_states[i]=1
else:
co_kitchen_2_states[i]=2
co_kitchen_2_power[i]=kitchen_2[co_kitchen_2_states[i]]
temp=int(states_idx[i])%3
if temp==0:
co_dish_states[i]=0
elif temp==1:
co_dish_states[i]=1
else:
co_dish_states[i]=2
co_dish_power[i]=dish[co_dish_states[i]]
for i in range(length_sequence):
if int(states_idx_actual[i])/18==0:
co_kitchen_states_actual[i]=0
else:
co_kitchen_states_actual[i]=1
co_kitchen_power_actual[i]=kitchen[co_kitchen_states_actual[i]]
temp=int(states_idx_actual[i])/9
if temp%2==0:
co_stove_states_actual[i]=0
else:
co_stove_states_actual[i]=1
co_stove_power_actual[i]=stove[co_stove_states_actual[i]]
temp=int(states_idx_actual[i])/3
if temp%3==0:
co_kitchen_2_states_actual[i]=0
elif temp%3==1:
co_kitchen_2_states_actual[i]=1
else:
co_kitchen_2_states_actual[i]=2
co_kitchen_2_power_actual[i]=kitchen_2[co_kitchen_2_states_actual[i]]
temp=int(states_idx_actual[i])%3
if temp==0:
co_dish_states_actual[i]=0
elif temp==1:
co_dish_states_actual[i]=1
else:
co_dish_states_actual[i]=2
co_dish_power_actual[i]=dish[co_dish_states_actual[i]]
Now we compare the produced output with Ground truth.
subplot(2,1,1);
plt.title('Actual Dishwasher Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.dishwasher);
subplot(2,1,2);
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Dishwasher Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_dish_power_actual);
subplot(2,1,1);
plt.title('Actual Kitchen Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.kitchen);
subplot(2,1,2);
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Kitchen Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_kitchen_power_actual);
subplot(2,1,1);
plt.title('Actual Kitchen 2 Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.kitchen_2);
subplot(2,1,2);
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Kitchen 2 Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_kitchen_2_power_actual);
subplot(2,1,1);
plt.title('Actual Stove Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.stove);
subplot(2,1,2);
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Stove Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_stove_power_actual);
Calculation of Residual Power and Standard Deviation
residual_power_total=sum(np.abs(residual_power_mains_1))
print "Residual Power is: ",residual_power_total," W"
standard_deviation=np.std(residual_power_mains_1)
print "Standard Deviation is ",standard_deviation
Next, we define a function to plot the confusion matrix.
def print_confusion_matrix(appliance,num_states,true_label,observed_label):
correct_predicted=0
conf_arr=[]
for i in range(num_states):
counts=[]
for j in range(num_states):
idx=np.where(true_label==i)[0]
counts.append(len(np.where(observed_label[idx]==j)[0]))
correct_predicted+=counts[i]
conf_arr.append(counts)
norm_conf = []
for i in conf_arr:
a = 0
tmp_arr = []
a = sum(i, 0)
for j in i:
tmp_arr.append(float(j)/float(a))
norm_conf.append(tmp_arr)
fig = plt.figure()
plt.clf()
ax = fig.add_subplot(111)
ax.set_aspect(1)
res = ax.imshow(np.array(norm_conf), cmap=plt.cm.jet,
interpolation='nearest')
width = len(conf_arr)
height = len(conf_arr[0])
for x in xrange(width):
for y in xrange(height):
ax.annotate(str(conf_arr[x][y]), xy=(y, x),
horizontalalignment='center',
verticalalignment='center')
cb = fig.colorbar(res)
alphabet = ['State 1','State 2','State 3']
plt.title('Confusion Matrix for '+appliance)
plt.xticks(range(width), alphabet[:width])
plt.yticks(range(height), alphabet[:height])
plt.show()
return correct_predicted*1.0/len(true_label)
Plotting the confusion matices for different appliances mains 1
stove_accuracy=print_confusion_matrix("stove",len(stove),labels_stove,co_stove_states)
kitchen_accuracy=print_confusion_matrix("kitchen",len(kitchen),labels_kitchen,co_kitchen_states)
kitchen_2_accuracy=print_confusion_matrix("kitchen_2",len(kitchen_2),labels_kitchen_2,co_kitchen_2_states)
dishwasher_accuracy=print_confusion_matrix("dishwasher",len(dish),labels_dishwasher,co_dish_states)
print stove_accuracy
stove_accuracy=print_confusion_matrix("stove",len(stove),labels_stove,co_stove_states_actual)
kitchen_accuracy=print_confusion_matrix("kitchen",len(kitchen),labels_kitchen,co_kitchen_states_actual)
kitchen_2_accuracy=print_confusion_matrix("kitchen_2",len(kitchen_2),labels_kitchen_2,co_kitchen_2_states_actual)
dishwasher_accuracy=print_confusion_matrix("dishwasher",len(dish),labels_dishwasher,co_dish_states_actual)
Finding the violations of switch continuity principle. Basically need to find the times when more than one appliance changed state. A simple algo for this is:
def switch_continuity(list_appliances_states):
sum_list=np.zeros(len(list_appliances_states[0])-1)
for appliance_list in list_appliances_states:
sum_list=sum_list+np.abs(np.diff(appliance_list))
return len(np.where(sum_list>1)[0])
switch_continuity_violations=switch_continuity([co_stove_states,co_kitchen_states,co_kitchen_2_states,co_dish_states])
print "Violation ",switch_continuity_violations
print "%Violation ",switch_continuity_violations*100.0/len(co_stove_states)
We now repeat the same procedure when we take the assumption that the sum of powers of different appliances must be less than or equal to the aggregate power.
co_positive_kitchen_states=np.zeros(length_sequence,dtype=np.int)
co_positive_kitchen_power=np.zeros(length_sequence)
co_positive_stove_states=np.zeros(length_sequence,dtype=np.int)
co_positive_stove_power=np.zeros(length_sequence)
co_positive_kitchen_2_states=np.zeros(length_sequence,dtype=np.int)
co_positive_kitchen_2_power=np.zeros(length_sequence)
co_positive_dish_states=np.zeros(length_sequence,dtype=np.int)
co_positive_dish_power=np.zeros(length_sequence)
for i in range(length_sequence):
if int(states_idx_positive[i])/18==0:
co_positive_kitchen_states[i]=0
else:
co_positive_kitchen_states[i]=1
co_positive_kitchen_power[i]=kitchen[co_positive_kitchen_states[i]]
temp=int(states_idx_positive[i])/9
if temp%2==0:
co_positive_stove_states[i]=0
else:
co_positive_stove_states[i]=1
co_positive_stove_power[i]=stove[co_positive_stove_states[i]]
temp=int(states_idx_positive[i])/3
if temp%3==0:
co_positive_kitchen_2_states[i]=0
elif temp%3==1:
co_positive_kitchen_2_states[i]=1
else:
co_positive_kitchen_2_states[i]=2
co_positive_kitchen_2_power[i]=kitchen_2[co_positive_kitchen_2_states[i]]
temp=int(states_idx_positive[i])%3
if temp==0:
co_positive_dish_states[i]=0
elif temp==1:
co_positive_dish_states[i]=1
else:
co_positive_dish_states[i]=2
co_positive_dish_power[i]=dish[co_positive_dish_states[i]]
subplot(2,1,1);
plt.title('Actual Stove Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.stove);
subplot(2,1,2);
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Stove Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_positive_stove_power);
subplot(2,1,1);
plt.title('Actual Dishwasher Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.dishwasher);
subplot(2,1,2);
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Dishwasher Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_positive_dish_power);
subplot(2,1,1);
plt.title('Actual Kitchen Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.kitchen);
subplot(2,1,2);
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Kitchen Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_positive_kitchen_power);
subplot(2,1,1);
plt.title('Actual Kitchen 2 Consumption');
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.kitchen_2);
subplot(2,1,2);
plt.ylim((0,1000));
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Observed Kitchen 2 Consumption');
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_positive_kitchen_2_power);
stove_positive_accuracy=print_confusion_matrix("stove",len(stove),labels_stove,co_positive_stove_states)
kitchen_positive_accuracy=print_confusion_matrix("kitchen",len(kitchen),labels_kitchen,co_positive_kitchen_states)
kitchen_positive_2_accuracy=print_confusion_matrix("kitchen_2",len(kitchen_2),labels_kitchen_2,co_positive_kitchen_2_states)
dishwasher_positive_accuracy=print_confusion_matrix("dishwasher",len(dish),labels_dishwasher,co_positive_dish_states)
residual_power_positive_total=sum(np.abs(residual_power_mains_1))
print residual_power_positive_total
residual_power_total=sum(np.abs(residual_power_positive_total))
print "Residual Power is: ",residual_power_positive_total," W"
standard_deviation=np.std(residual_power_mains_1_positive)
print "Standard Deviation is ",standard_deviation
Now we see if we take actual mains as in case 1 how does it affect the results, that is if we do not filter the data, how would result be affected.
residual_power_mains_1_actual=np.zeros(len(df_a))
states_idx_actual=np.zeros(len(downsampled_mains_1))
for i in range(len(downsampled_mains_1)):
[states_idx_actual[i],residual_power_mains_1_actual[i]]=find_nearest_positive(sum_combination,df[i])
co_kitchen_actual_states=np.zeros(length_sequence,dtype=np.int)
co_kitchen_actual_power=np.zeros(length_sequence)
co_stove_actual_states=np.zeros(length_sequence,dtype=np.int)
co_stove_actual_power=np.zeros(length_sequence)
co_kitchen_2_actual_states=np.zeros(length_sequence,dtype=np.int)
co_kitchen_2_actual_power=np.zeros(length_sequence)
co_dish_actual_states=np.zeros(length_sequence,dtype=np.int)
co_dish_actual_power=np.zeros(length_sequence)
for i in range(length_sequence):
if int(states_idx_actual[i])/18==0:
co_kitchen_actual_states[i]=0
else:
co_kitchen_actual_states[i]=1
co_kitchen_actual_power[i]=kitchen[co_kitchen_actual_states[i]]
temp=int(states_idx_actual[i])/9
if temp%2==0:
co_stove_actual_states[i]=0
else:
co_stove_actual_states[i]=1
co_stove_actual_power[i]=stove[co_stove_actual_states[i]]
temp=int(states_idx_actual[i])/3
if temp%3==0:
co_kitchen_2_actual_states[i]=0
elif temp%3==1:
co_kitchen_2_actual_states[i]=1
else:
co_kitchen_2_actual_states[i]=2
co_kitchen_2_actual_power[i]=kitchen_2[co_kitchen_2_actual_states[i]]
temp=int(states_idx_actual[i])%3
if temp==0:
co_dish_actual_states[i]=0
elif temp==1:
co_dish_actual_states[i]=1
else:
co_dish_actual_states[i]=2
co_dish_actual_power[i]=dish[co_dish_actual_states[i]]
plt.subplot(2,1,1)
plt.title('Actual Stove Consumption')
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_stove)
plt.subplot(2,1,2)
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Predicted Stove Consumption')
plt.plot(downsampled_timestamp_appliance_date,co_positive_stove_power);
plt.subplot(2,1,1)
plt.title('Actual Dish Washer Consumption')
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_dishwasher)
plt.subplot(2,1,2)
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Predicted Dish Washer Consumption (CO Positive)')
plt.plot(downsampled_timestamp_appliance_date,co_positive_dish_power);
plt.subplot(2,1,1)
plt.title('Actual Kitchen 2 Consumption')
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_kitchen_2)
plt.subplot(2,1,2)
plt.title('Predicted Kitchen 2 Consumption (CO Positive)')
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.ylim((0,800))
plt.plot(downsampled_timestamp_appliance_date,co_positive_kitchen_2_power);
stove_positive_accuracy=print_confusion_matrix("stove",len(stove),labels_stove,co_positive_stove_states)
kitchen_positive_accuracy=print_confusion_matrix("kitchen",len(kitchen),labels_kitchen,co_positive_kitchen_states)
kitchen_positive_2_accuracy=print_confusion_matrix("kitchen_2",len(kitchen_2),labels_kitchen_2,co_positive_kitchen_2_states)
dishwasher_positive_accuracy=print_confusion_matrix("dishwasher",len(dish),labels_dish,co_positive_dish_states)
We now do the same analysis for Mains 2 as we did for Mains 1.
Distribution of states space
states_combination_2=list(itertools.product(ref,lighting,micro))
print "Possible state combinations for Mains2 are\nRef.,Lighting,Microwave\n",states_combination_2
sum_combination_2=np.array(np.zeros(len(states_combination_2)))
for i in range(0,len(states_combination_2)):
sum_combination_2[i]=sum(states_combination_2[i])
from copy import deepcopy
b=deepcopy(sum_combination_2)
b.sort()
grid(True);
title('Sorted possible sums of all appliances');
xlabel('Combinations');
ylabel('Power (W)');
plot(b,'go-');
title('Histogram showing relative frequencies of different state combinations');
hist(b,30);
length_sequence=len(df_filtered_mains_test.Mains_2_Power.values)
co_ref_states=np.zeros(length_sequence,dtype=np.int)
co_ref_power=np.zeros(length_sequence)
co_micro_states=np.zeros(length_sequence,dtype=np.int)
co_micro_power=np.zeros(length_sequence)
co_lighting_states=np.zeros(length_sequence,dtype=np.int)
co_lighting_power=np.zeros(length_sequence)
length_sequence=len(df_filtered_mains_test.Mains_2_Power.values)
co_ref_states_actual=np.zeros(length_sequence,dtype=np.int)
co_ref_power_actual=np.zeros(length_sequence)
co_micro_states_actual=np.zeros(length_sequence,dtype=np.int)
co_micro_power_actual=np.zeros(length_sequence)
co_lighting_states_actual=np.zeros(length_sequence,dtype=np.int)
co_lighting_power_actual=np.zeros(length_sequence)
states_idx_2=np.zeros(len(df_filtered_mains_test.Mains_2_Power.values))
residual_power_mains_2=np.zeros(len(df_filtered_mains_test.Mains_2_Power.values))
t0=time.time()
for i in range(len(df_filtered_mains_test.Mains_2_Power.values)):
[states_idx_2[i],residual_power_mains_2[i]]=find_nearest(sum_combination_2,df_filtered_mains_test.Mains_2_Power.values[i])
t1=time.time()
print "Time taken for CO Mains 2 :",t1-t0
states_idx_2_actual=np.zeros(len(df_test.Mains_2_Power.values))
residual_power_mains_2_actual=np.zeros(len(df_test.Mains_2_Power.values))
t0=time.time()
for i in range(len(df_test.Mains_2_Power.values)):
[states_idx_2_actual[i],residual_power_mains_2_actual[i]]=find_nearest(sum_combination_2,df_test.Mains_2_Power.values[i])
t1=time.time()
print "Time taken for CO Mains 2 :",t1-t0
title('Residual Power Mains 2');
xlabel('Time');
ylabel('Power (W)');
plot(residual_power_mains_2);
for i in range(length_sequence):
if int(states_idx_2_actual[i])/9==0:
co_ref_states_actual[i]=0
elif int(states_idx_2_actual[i])/9==1:
co_ref_states_actual[i]=1
else:
co_ref_states_actual[i]=2
co_ref_power_actual[i]=ref[co_ref_states_actual[i]]
temp=int(states_idx_2_actual[i])/3
if temp%3==0:
co_lighting_states_actual[i]=0
elif temp%3==1:
co_lighting_states_actual[i]=1
else:
co_lighting_states_actual[i]=2
co_lighting_power_actual[i]=lighting[co_lighting_states_actual[i]]
temp=int(states_idx_2_actual[i])%3
if temp==0:
co_micro_states_actual[i]=0
elif temp==1:
co_micro_states_actual[i]=1
else:
co_micro_states_actual[i]=2
co_micro_power_actual[i]=micro[co_micro_states_actual[i]]
plt.subplot(2,1,1)
ylim((0,1000))
plt.title('Actual Ref Consumption')
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.refrigerator.values,linewidth=0.8)
plt.subplot(2,1,2)
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Predicted Ref Consumption (CO)')
plt.plot(df_appliances_minute_test.refrigerator.index.to_pydatetime(),co_ref_power,linewidth=0.8);
plt.subplot(2,1,1)
plt.title('Actual Lighting Consumption')
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.light.values,linewidth=0.8)
plt.subplot(2,1,2)
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Predicted Ref Lighting (CO)')
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_lighting_power,linewidth=0.8);
plt.subplot(2,1,1)
plt.title('Actual Microwave Consumption')
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.microwave.values,linewidth=0.8)
plt.subplot(2,1,2)
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Predicted Microwave Consumption (CO)')
plt.plot(df_appliances_minute_test.index.to_pydatetime(),co_micro_power,linewidth=0.8);
ref_accuracy=print_confusion_matrix("Ref",len(ref),labels_refrigerator,co_ref_states)
micro_accuracy=print_confusion_matrix("Micro",len(micro),labels_microwave,co_micro_states)
lighting_accuracy=print_confusion_matrix("Lighting",len(lighting),labels_light,co_lighting_states)
ref_accuracy=print_confusion_matrix("Ref",len(ref),labels_refrigerator,co_ref_states_actual)
micro_accuracy=print_confusion_matrix("Micro",len(micro),labels_microwave,co_micro_states_actual)
lighting_accuracy=print_confusion_matrix("Lighting",len(lighting),labels_light,co_lighting_states_actual)
df_appliances_minute_test.refrigerator
num=np.abs(co_stove_power_actual-df_appliances_minute_test.stove)+np.abs(co_dish_power_actual-df_appliances_minute_test.dishwasher)+\
np.abs(co_lighting_power_actual-df_appliances_minute_test.light)+np.abs(co_kitchen_2_power_actual-df_appliances_minute_test.kitchen_2)+\
np.abs(co_kitchen_power_actual-df_appliances_minute_test.kitchen)+np.abs(co_ref_power_actual-df_appliances_minute_test.refrigerator)+\
np.abs(co_micro_power_actual-df_appliances_minute_test.microwave)
num
num2=np.abs(co_stove_power_actual-df_appliances_minute_test.stove.values)+np.abs(co_dish_power_actual-df_appliances_minute_test.dishwasher.values)+\
np.abs(co_lighting_power_actual-df_appliances_minute_test.light.values)+np.abs(co_kitchen_2_power_actual-df_appliances_minute_test.kitchen_2.values)+\
np.abs(co_kitchen_power_actual-df_appliances_minute_test.kitchen.values)+np.abs(co_ref_power_actual-df_appliances_minute_test.refrigerator.values)+\
np.abs(co_micro_power_actual-df_appliances_minute_test.microwave.values)
num2
numerator=np.sum(num2)
print numerator
deno=np.sum(df_mains_test.Mains_1_Power.values)
print df_mains_test.Mains_1_Power.sum()
print df_mains_test.Mains_2_Power.sum()
numerator/(1*(df_mains_test.Mains_1_Power.sum()+df_mains_test.Mains_2_Power.sum()))
overall_sum_appliances=df_appliances_minute_test.sum(axis=1)
overall_sum_appliances
overall_sum_mains=df_mains_test.sum(axis=1)
overall_sum_mains
overall_rms=overall_sum_appliances-overall_sum_mains
value=overall_rms.std()
value
np.std(co_stove_power_actual-df_appliances_minute_test.stove.values)
num_stove=np.sum(np.abs(co_stove_power_actual-df_appliances_minute_test.stove.values))
print num_stove
deno_stove=np.sum(df_appliances_minute_test.stove.values)
print deno_stove
stove_energy=num_stove/deno_stove
stove_energy
num_dishwasher=np.sum(np.abs(co_dish_power_actual-df_appliances_minute_test.dishwasher.values))
print num_dishwasher
deno_dishwasher=np.sum(df_appliances_minute_test.dishwasher.values)
print deno_dishwasher
dishwasher_energy=num_dishwasher/deno_dishwasher
print dishwasher_energy
print np.std(co_dish_power_actual-df_appliances_minute_test.dishwasher.values)
TODO: Violation of switch continuity principle
num_refrigerator=np.sum(np.abs(co_ref_power_actual-df_appliances_minute_test.refrigerator.values))
print num_refrigerator
deno_refrigerator=np.sum(df_appliances_minute_test.refrigerator.values)
print deno_refrigerator
refrigerator_energy=num_refrigerator/deno_refrigerator
print refrigerator_energy
print np.std(np.abs(co_ref_power_actual-df_appliances_minute_test.refrigerator.values))
num_microwave=np.sum(np.abs(co_micro_power_actual-df_appliances_minute_test.microwave.values))
print num_microwave
deno_microwave=np.sum(df_appliances_minute_test.microwave.values)
print deno_microwave
microwave_energy=num_microwave/deno_microwave
print microwave_energy
print np.std(np.abs(co_micro_power_actual-df_appliances_minute_test.microwave.values))
num_lighting=np.sum(np.abs(co_lighting_power_actual-df_appliances_minute_test.light.values))
print num_lighting
deno_lighting=np.sum(df_appliances_minute_test.light.values)
print deno_lighting
lighting_energy=num_lighting/deno_lighting
print lighting_energy
print np.std(np.abs(co_lighting_power_actual-df_appliances_minute_test.light.values))
num_kitchen=np.sum(np.abs(co_kitchen_power_actual-df_appliances_minute_test.kitchen.values))
print num_kitchen
deno_kitchen=np.sum(df_appliances_minute_test.kitchen.values)
print deno_kitchen
kitchen_energy=num_kitchen/deno_kitchen
print kitchen_energy
print np.std(np.abs(co_kitchen_power_actual-df_appliances_minute_test.kitchen.values))
num_kitchen_2=np.sum(np.abs(co_kitchen_2_power_actual-df_appliances_minute_test.kitchen_2.values))
print num_kitchen_2
deno_kitchen_2=np.sum(df_appliances_minute_test.kitchen_2.values)
print deno_kitchen_2
kitchen_2_energy=num_kitchen_2/deno_kitchen_2
print kitchen_2_energy
print np.std(np.abs(co_kitchen_2_power_actual-df_appliances_minute_test.kitchen_2.values))
plt.subplot(2,1,1)
plt.plot(co_kitchen_2_power_actual[2000:4000])
plt.subplot(2,1,2)
plt.plot(df_appliances_minute_test.kitchen_2.values[2000:4000]);
states_combination=list(itertools.product(kitchen,stove,kitchen_2,dish,ref,lighting,micro))
len(states_combination)
co_kitchen_actual_states=np.zeros(length_sequence,dtype=np.int)
co_kitchen_actual_power=np.zeros(length_sequence)
co_stove_actual_states=np.zeros(length_sequence,dtype=np.int)
co_stove_actual_power=np.zeros(length_sequence)
co_kitchen_2_actual_states=np.zeros(length_sequence,dtype=np.int)
co_kitchen_2_actual_power=np.zeros(length_sequence)
co_dish_actual_states=np.zeros(length_sequence,dtype=np.int)
co_dish_actual_power=np.zeros(length_sequence)
states_idx_2_actual=np.zeros(len(df_test.Mains_2_Power.values))
residual_power_mains_2_actual=np.zeros(len(df_test.Mains_2_Power.values))
t0=time.time()
for i in range(len(df_test.Mains_2_Power.values)):
[states_idx_2_actual[i],residual_power_mains_2_actual[i]]=find_nearest(sum_combination,df_test.Mains_2_Power.values[i]+df_test.Mains_1_Power.values[i])
t1=time.time()
print "Time taken for CO Mains 2 :",t1-t0
for i in range(length_sequence):
if int(states_idx_2_actual[i])/9==0:
co_ref_states_actual[i]=0
elif int(states_idx_2_actual[i])/9==1:
co_ref_states_actual[i]=1
else:
co_ref_states_actual[i]=2
co_ref_power_actual[i]=ref[co_ref_states_actual[i]]
temp=int(states_idx_2_actual[i])/3
if temp%3==0:
co_lighting_states_actual[i]=0
elif temp%3==1:
co_lighting_states_actual[i]=1
else:
co_lighting_states_actual[i]=2
co_lighting_power_actual[i]=lighting[co_lighting_states_actual[i]]
temp=int(states_idx_2_actual[i])%3
if temp==0:
co_micro_states_actual[i]=0
elif temp==1:
co_micro_states_actual[i]=1
else:
co_micro_states_actual[i]=2
co_micro_power_actual[i]=micro[co_micro_states_actual[i]]
if int(states_idx_2_actual[i])/486==0:
co_kitchen_actual_states[i]=0
else:
co_kitchen_actual_states[i]=1
co_kitchen_actual_power[i]=kitchen[co_kitchen_actual_states[i]]
temp=int(states_idx_2_actual[i])/243
if temp%2==0:
co_stove_actual_states[i]=0
else:
co_stove_actual_states[i]=1
co_stove_actual_power[i]=stove[co_stove_actual_states[i]]
temp=int(states_idx_2_actual[i])/81
if temp%3==0:
co_kitchen_2_actual_states[i]=0
elif temp%3==1:
co_kitchen_2_actual_states[i]=1
else:
co_kitchen_2_actual_states[i]=2
co_kitchen_2_actual_power[i]=kitchen_2[co_kitchen_2_actual_states[i]]
temp=int(states_idx_2_actual[i])/27
if temp==0:
co_dish_actual_states[i]=0
elif temp==1:
co_dish_actual_states[i]=1
else:
co_dish_actual_states[i]=2
co_dish_actual_power[i]=dish[co_dish_actual_states[i]]
plt.subplot(2,1,1)
ylim((0,1000))
plt.title('Actual Ref Consumption')
plt.xlabel('Time');
plt.ylabel('Power (W)');
subplots_adjust(hspace=.5)
plt.plot(df_appliances_minute_test.index.to_pydatetime(),df_appliances_minute_test.refrigerator.values,linewidth=0.8)
plt.subplot(2,1,2)
plt.xlabel('Time');
plt.ylabel('Power (W)');
plt.title('Predicted Ref Consumption (CO)')
plt.plot(df_appliances_minute_test.refrigerator.index.to_pydatetime(),co_ref_power,linewidth=0.8);
ref_accuracy=print_confusion_matrix("Ref",len(ref),labels_refrigerator,co_ref_states_actual)
micro_accuracy=print_confusion_matrix("Micro",len(micro),labels_microwave,co_micro_states_actual)
lighting_accuracy=print_confusion_matrix("Lighting",len(lighting),labels_light,co_lighting_states_actual)
stove_accuracy=print_confusion_matrix("Stove",len(stove),labels_stove,co_stove_states_actual)
kitchen_accuracy=print_confusion_matrix("kitchen",len(kitchen),labels_kitchen,co_kitchen_states_actual)
kitchen_2_accuracy=print_confusion_matrix("kitchen_2",len(kitchen_2),labels_kitchen_2,co_kitchen_2_states_actual)
dishwasher_accuracy=print_confusion_matrix("dishwasher",len(dish),labels_dishwasher,co_dish_states_actual)
Using EM algorithm, we try to learn HMM parameters for different appliances.
# For Stove which is two state, we try to learn the parameters using Baum
stove_prior=np.array([0.8,0.2])
stove_transmat=np.array([[0.9,0.1],[0.1,0.9]])
stove_emission=np.array([[.9,.1],[0.1,0.9]])
[LL, stove_learnt_prior, stove_learnt_transmat, stove_learnt_obsmat,nr_iter] = dhmm_em([labels_stove], stove_prior, stove_transmat, stove_emission, 3500,.0000001 );
title('Log Likelihood vs Iterations for Stove');
xlabel('Iterations');
ylabel('Log Likelihood');
plot(LL);
def print_hmm_parameters(obsmat,prior,transmat):
print "Learnt HMM Parameters\nObservation Matrix ",obsmat,"\nPrior ",prior," \nTransition Matrix ",transmat
print_hmm_parameters(stove_learnt_obsmat,stove_learnt_prior,stove_learnt_transmat)
# For Stove which is two state, we try to learn the parameters using Baum
ref_prior=np.array([0.9,0.05,0.05])
ref_transmat=np.array([[0.95,0.05,0],[0.05,0.9,0.05],[0.05,0.05,.9]])
ref_emission=np.array([[.99,.01,0],[0.05,0.9,.05],[0.05,0.05,.9]])
[LL, ref_learnt_prior, ref_learnt_transmat, ref_learnt_obsmat,nr_iter] = dhmm_em([labels_ref], ref_prior, ref_transmat, ref_emission, 3500,.0000001 );
title('Log Likelihood vs Iterations for Ref.');
xlabel('Iterations');
ylabel('Log Likelihood');
plot(LL);
print_hmm_parameters(ref_learnt_obsmat,ref_learnt_prior,ref_learnt_transmat)
# For Stove which is two state, we try to learn the parameters using Baum
micro_prior=np.array([0.9,0.05,0.05])
micro_transmat=np.array([[0.95,0.05,0],[0.05,0.9,0.05],[0.05,0.05,.9]])
micro_emission=np.array([[.99,.01,0],[0.05,0.9,.05],[0.05,0.05,.9]])
[LL, micro_learnt_prior, micro_learnt_transmat, micro_learnt_obsmat,nr_iter] = dhmm_em([labels_micro], micro_prior, micro_transmat, micro_emission, 3500,.0000001 );
title('Log Likelihood vs Iterations for Micro');
xlabel('Iterations');
ylabel('Log Likelihood');
plot(LL);
print_hmm_parameters(micro_learnt_obsmat,micro_learnt_prior,micro_learnt_transmat)
# For Stove which is two state, we try to learn the parameters using Baum
lighting_prior=np.array([0.9,0.05,0.05])
lighting_transmat=np.array([[0.95,0.05,0],[0.05,0.9,0.05],[0.05,0.05,.9]])
lighting_emission=np.array([[.99,.01,0],[0.05,0.9,.05],[0.05,0.05,.9]])
[LL, lighting_learnt_prior, lighting_learnt_transmat, lighting_learnt_obsmat,nr_iter] = dhmm_em([labels_lighting], lighting_prior, lighting_transmat, lighting_emission, 3500,.0000001 );
title('Log Likelihood vs Iterations for Lighting');
xlabel('Iterations');
ylabel('Log Likelihood');
plot(LL);
print_hmm_parameters(lighting_learnt_obsmat,lighting_learnt_prior,lighting_learnt_transmat)
# For Stove which is two state, we try to learn the parameters using Baum
dishwasher_prior=np.array([0.9,0.05,0.05])
dishwasher_transmat=np.array([[0.95,0.05,0],[0.05,0.9,0.05],[0.05,0.05,.9]])
dishwasher_emission=np.array([[.99,.01,0],[0.05,0.9,.05],[0.05,0.05,.9]])
[LL, dishwasher_learnt_prior, dishwasher_learnt_transmat, dishwasher_learnt_obsmat,nr_iter] = dhmm_em([labels_dish], dishwasher_prior, dishwasher_transmat, dishwasher_emission, 3500,.0000001 );
title('Log Likelihood vs Iterations for Dishwasher');
xlabel('Iterations');
ylabel('Log Likelihood');
plot(LL);
print_hmm_parameters(dishwasher_learnt_obsmat,dishwasher_learnt_prior,dishwasher_learnt_transmat)
# For Stove which is two state, we try to learn the parameters using Baum
kitchen_2_prior=np.array([0.9,0.05,0.05])
kitchen_2_transmat=np.array([[0.95,0.05,0],[0.05,0.9,0.05],[0.05,0.05,.9]])
kitchen_2_emission=np.array([[.99,.01,0],[0.05,0.9,.05],[0.05,0.05,.9]])
[LL, kichen_2_learnt_prior, kitchen_2_learnt_transmat, kitchen_2_learnt_obsmat,nr_iter] = dhmm_em([labels_kitchen_2], kitchen_2_prior, kitchen_2_transmat, kitchen_2_emission, 3500,.0000001 );
title('Log Likelihood vs Iterations for Kitchen 2');
xlabel('Iterations');
ylabel('Log Likelihood');
plot(LL);
print_hmm_parameters(kitchen_2_learnt_obsmat,kichen_2_learnt_prior,kitchen_2_learnt_transmat)
# For Stove which is two state, we try to learn the parameters using Baum
kitchen_prior=np.array([0.95,0.05])
kitchen_transmat=np.array([[0.95,0.05],[0.05,0.95]])
kitchen_emission=np.array([[.99,.01],[0.01,0.99]])
[LL, kitchen_learnt_prior, kitchen_learnt_transmat, kitchen_learnt_obsmat,nr_iter] = dhmm_em([labels_kitchen], kitchen_prior, kitchen_transmat, kitchen_emission, 3500,.0000001 );
title('Log Likelihood vs Iterations for Kitchen');
xlabel('Iterations');
ylabel('Log Likelihood');
plot(LL);
print_hmm_parameters(kitchen_learnt_obsmat,kitchen_learnt_prior,kitchen_learnt_transmat)
Combining different states according to technique used for FHMM. We define functions for combining constituent priors into prior for FHMM and similarly for Transition and Emission matrices.
For mains 2
def calculate_combined_pie(ordered_list_appliance_pies):
total_series=len(ordered_list_appliance_pies)
result=np.array(ordered_list_appliance_pies[0])
for i in range(total_series-1):
m=np.vstack(result.flatten())
size_n=len(ordered_list_appliance_pies[i+1])
n=np.reshape(ordered_list_appliance_pies[i+1],(1,size_n))
result=np.dot(m,n)
return result.flatten()
pie_combined=calculate_combined_pie([ref_learnt_prior,lighting_learnt_prior,micro_learnt_prior])
Combining the transition matrices and the emission matrices. It should be seen that it can be done by Kronecker multiplication.
def calculate_combined_A(ordered_transmat_list):
total_series=len(ordered_transmat_list)
result=ordered_transmat_list[0]
for i in range(total_series-1):
result=np.kron(result,ordered_transmat_list[i+1])
return result
A_combined=calculate_combined_A([ref_learnt_transmat,lighting_learnt_transmat,micro_learnt_transmat])
A_combined.shape
B_combined=calculate_combined_A([ref_learnt_obsmat,lighting_learnt_obsmat,micro_learnt_obsmat])
B_combined.shape
Now Viterbi algorithm is used to decode the most likely sequence.
from viterbi_path import path;
We plot the predicted state sequence and compare against observed state sequence.
title('Observed State Sequence');
plot(states_idx_2);
We now see the observations and map them from 0 to 26 based on closeness to the total power in those cases.
viterbi_produced_path=path(pie_combined,A_combined,B_combined,states_idx_2)
path_produced=viterbi_produced_path[0]
title('Predicted State Sequence according to Viterbi');
plot(path_produced);
length_sequence=len(filtered_downsampled_mains_2)
hmm_ref_states=np.zeros(length_sequence,dtype=np.int)
hmm_ref_power=np.zeros(length_sequence)
hmm_micro_states=np.zeros(length_sequence,dtype=np.int)
hmm_micro_power=np.zeros(length_sequence)
hmm_lighting_states=np.zeros(length_sequence,dtype=np.int)
hmm_lighting_power=np.zeros(length_sequence)
for i in range(length_sequence):
if int(path_produced[i])/9==0:
hmm_ref_states[i]=0
elif int(path_produced[i])/9==1:
hmm_ref_states[i]=1
else:
hmm_ref_states[i]=2
hmm_ref_power[i]=ref[hmm_ref_states[i]]
temp=int(path_produced[i])/3
if temp%3==0:
hmm_lighting_states[i]=0
elif temp%3==1:
hmm_lighting_states[i]=1
else:
hmm_lighting_states[i]=2
hmm_lighting_power[i]=lighting[hmm_lighting_states[i]]
temp=int(path_produced[i])%3
if temp==0:
hmm_micro_states[i]=0
elif temp==1:
hmm_micro_states[i]=1
else:
hmm_micro_states[i]=2
hmm_micro_power[i]=micro[hmm_micro_states[i]]
plt.subplot(3,1,1)
ylim((0,1000))
plt.title('Actual Ref Consumption')
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_refrigerator)
plt.subplot(3,1,2)
plt.title('Predicted Ref Consumption (FHMM)')
plt.plot(downsampled_timestamp_appliance_date,hmm_ref_power);
plt.subplot(3,1,3)
plt.title('Predicted Ref Consumption (CO)')
plt.plot(downsampled_timestamp_appliance_date,co_ref_power);
plt.subplot(3,1,1)
plt.title('Actual Lighting Consumption')
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_lighting)
plt.subplot(3,1,2)
plt.ylim((0,200))
plt.title('Predicted Lighting Consumption (FHMM)')
plt.plot(downsampled_timestamp_appliance_date,hmm_lighting_power);
plt.subplot(3,1,3)
plt.ylim((0,200))
plt.title('Predicted Lighting Consumption (CO)')
plt.plot(downsampled_timestamp_appliance_date,co_lighting_power);
plt.subplot(3,1,1)
plt.title('Actual Micro Consumption')
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_microwave)
plt.subplot(3,1,2)
plt.title('Predicted Micro Consumption (FHMM)')
plt.plot(downsampled_timestamp_appliance_date,hmm_micro_power);
plt.subplot(3,1,3)
plt.title('Predicted Micro Consumption (CO)')
plt.plot(downsampled_timestamp_appliance_date,co_micro_power);
ref_accuracy=print_confusion_matrix("Ref",len(ref),labels_ref,hmm_ref_states)
micro_accuracy=print_confusion_matrix("Micro",len(micro),labels_micro,hmm_micro_states)
lighting_accuracy=print_confusion_matrix("Lighting",len(lighting),labels_lighting,hmm_lighting_states)
We do a similar analysis for Mains 1.
pie_combined_1=calculate_combined_pie([kitchen_learnt_prior,stove_learnt_prior,kitchen_2_prior,dishwasher_learnt_prior])
A_combined_1=calculate_combined_A([kitchen_learnt_transmat,stove_learnt_transmat,kitchen_2_transmat,dishwasher_learnt_transmat])
B_combined_1=calculate_combined_A([kitchen_learnt_obsmat,stove_learnt_obsmat,kitchen_2_learnt_obsmat,dishwasher_learnt_obsmat])
viterbi_produced_path_1=path(pie_combined_1,A_combined_1,B_combined_1,states_idx)
path_produced_1=viterbi_produced_path_1[0]
hmm_kitchen_actual_states=np.zeros(length_sequence,dtype=np.int)
hmm_kitchen_actual_power=np.zeros(length_sequence)
hmm_stove_actual_states=np.zeros(length_sequence,dtype=np.int)
hmm_stove_actual_power=np.zeros(length_sequence)
hmm_kitchen_2_actual_states=np.zeros(length_sequence,dtype=np.int)
hmm_kitchen_2_actual_power=np.zeros(length_sequence)
hmm_dish_actual_states=np.zeros(length_sequence,dtype=np.int)
hmm_dish_actual_power=np.zeros(length_sequence)
for i in range(length_sequence):
if int(path_produced_1[i])/18==0:
hmm_kitchen_actual_states[i]=0
else:
hmm_kitchen_actual_states[i]=1
hmm_kitchen_actual_power[i]=kitchen[hmm_kitchen_actual_states[i]]
temp=int(path_produced_1[i])/9
if temp%2==0:
hmm_stove_actual_states[i]=0
else:
hmm_stove_actual_states[i]=1
hmm_stove_actual_power[i]=stove[hmm_stove_actual_states[i]]
temp=int(path_produced_1[i])/3
if temp%3==0:
hmm_kitchen_2_actual_states[i]=0
elif temp%3==1:
hmm_kitchen_2_actual_states[i]=1
else:
hmm_kitchen_2_actual_states[i]=2
hmm_kitchen_2_actual_power[i]=kitchen_2[hmm_kitchen_2_actual_states[i]]
temp=int(path_produced_1[i])%3
if temp==0:
hmm_dish_actual_states[i]=0
elif temp==1:
hmm_dish_actual_states[i]=1
else:
hmm_dish_actual_states[i]=2
hmm_dish_actual_power[i]=dish[hmm_dish_actual_states[i]]
plt.subplot(3,1,1)
ylim((0,1000))
plt.title('Actual Kitchen Consumption')
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_kitchen)
plt.subplot(3,1,2)
ylim((0,1000))
plt.title('Predicted Kitchen Consumption (FHMM)')
plt.plot(downsampled_timestamp_appliance_date,hmm_kitchen_actual_power);
plt.subplot(3,1,3)
ylim((0,1000))
plt.title('Predicted Kitchen Consumption (CO)')
plt.plot(downsampled_timestamp_appliance_date,co_kitchen_power);
plt.subplot(3,1,1)
ylim((0,1000))
plt.title('Actual Kitchen 2 Consumption')
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_kitchen_2)
plt.subplot(3,1,2)
ylim((0,1000))
plt.title('Predicted Kitchen 2 Consumption (FHMM)')
plt.plot(downsampled_timestamp_appliance_date,hmm_kitchen_2_actual_power);
plt.subplot(3,1,3)
ylim((0,1000))
plt.title('Predicted Kitchen 2 Consumption (CO)')
plt.plot(downsampled_timestamp_appliance_date,co_kitchen_2_power);
plt.subplot(3,1,1)
plt.title('Actual Stove Consumption')
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_stove)
plt.subplot(3,1,2)
plt.title('Predicted Stove Consumption (FHMM)')
plt.plot(downsampled_timestamp_appliance_date,hmm_stove_actual_power);
plt.subplot(3,1,3)
plt.title('Predicted Stove Consumption (CO)')
plt.plot(downsampled_timestamp_appliance_date,co_stove_power);
plt.subplot(3,1,1)
plt.title('Actual Dishwasher Consumption')
subplots_adjust(hspace=.5)
plt.plot(downsampled_timestamp_appliance_date,downsampled_dishwasher)
plt.subplot(3,1,2)
plt.title('Predicted Dishwasher Consumption (FHMM)')
plt.plot(downsampled_timestamp_appliance_date,hmm_dish_actual_power);
plt.subplot(3,1,3)
plt.title('Predicted Dishwasher Consumption (CO)')
plt.plot(downsampled_timestamp_appliance_date,co_dish_power);
kitchen_accuracy=print_confusion_matrix("Kitchen",len(kitchen),labels_kitchen,hmm_kitchen_actual_states)
kitchen_2_accuracy=print_confusion_matrix("Kitchen 2",len(kitchen_2),labels_kitchen_2,hmm_kitchen_2_actual_states)
stove_accuracy=print_confusion_matrix("Stove",len(stove),labels_stove,hmm_stove_actual_states)
dish_accuracy=print_confusion_matrix("Dish Washer",len(dish),labels_dish,hmm_dish_actual_states)
We now compare the accuracies of the two approaches across different mains circuits.
plt.title('Disaggregation Accuracy for Mains 1');
plt.ylabel('Accuracy')
plt.ylim((50,100))
plt.bar( numpy.arange(4) * 2, [98.0501392758,92.9649543305,98.5489408564,97.324609704], color = 'red' );
plt.bar( numpy.arange(4) * 2 +0.8, [97.58,96.25,99.31,98.7], color = 'green' );
locs, labels = xticks();
xticks(locs+1, ('Kitchen',' ', 'Kitchen 2',' ', 'Stove',' ', 'Dishwasher'));
plt.legend(('FHMM','CO'),);
#set_xticklabels( ('G1', 'G2', 'G3', 'G4') )
plt.title('Disaggregation Accuracy for Mains 2');
plt.ylabel('Accuracy')
plt.ylim((50,100))
plt.bar( numpy.arange(3) * 2, [91.03,93.03,82.8], color = 'red' );
plt.bar( numpy.arange(3) * 2 +0.8, [93.29,93.54,82.2], color = 'green' );
locs, labels = xticks();
xticks(locs+1, ('Refrigerator',' ', 'Microwave',' ', 'Lighting'));
plt.legend(('FHMM','CO'),);